#delimit;
version 13;
set more off;
program drop _all;
  quietly log;
  local logon = r(status);
  if "`logon'" == "on" {; log close; };
log using table1.log, text replace;


/*	********************************************************************************	*/
/*     	File Name: 	table1.do								*/
/*     	Date:   	May 19, 2015			 					*/
/*      Author: 	Frederick J. Boehmke and Olga Chyzh					*/
/*      Purpose:	Run logit models to replicate Maoz et al. (2006), then read in 		*/
/*			multiply imputed coefficents from the ERGM run in R and create final 	*/
/*			estimates to output table 1.						*/
/*      Input File:	jcr2006replication.dta,							*/
/*      Input File:	maoz-gleditsch1948.dta,							*/
/*			table1-ergm-base.txt,							*/
/*			table1-ergm-ivnetworkX.txt,						*/
/* 			table1-ergm-ivnetworkX.txt,						*/
/*			table1-ergm-ivscoreX.txt						*/
/*      Output File:	table1.log,								*/
/*			table1.dta,								*/
/*			table1.txt								*/
/*	Requires:	estout (Stata package)							*/
/*	********************************************************************************	*/


set seed 54;
program drop _all;

	/********************************************************/
	/* This program takes dyadic data and creates the	*/ 
	/* structural equivalence measure, also in dyadic form. */
	/********************************************************/
	
	
program define seq, rclass;

  syntax varname [if] [in], id1(varname) id2(varname) ;
  
	tempfile ids;
  
	marksample touse;
	
	quietly keep if `touse';
	
	quietly keep *`varlist' `id1' `id2' year;
		
	sort `id1' `id2';
	
	quietly reshape wide *`varlist', i(`id1') j(`id2');
	
	  quietly reshape long `varlist', i(`id1') j(`id2');	
		  
		preserve;
		
		quietly keep `id1' `id2';
		
		  quietly egen i = group(`id1');
		  quietly bysort i: generat j = _n;
		
		  quietly save `ids', replace;
		
		restore;
		
		quietly reshape wide `varlist', i(`id1') j(`id2');
	
	quietly putmata X=(`varlist'*), replace;
	
	clear;
	
	mata: seq(X);
		
	quietly svmat SEQ;

	quietly generat i = _n;
	
	quietly reshape long SEQ, i(i) j(j);
	
		rename SEQ `varlist'_seq;
	
		quietly merge 1:1 i j using `ids';

		quietly drop i j _merge;
		
	capture rename id1 `id1';
	capture rename id2 `id2';

end;


	/********************************************************/
	/* This mata program manipulates the matrices to 	*/ 
	/* calculate the structural equivalence measure. It is  */
	/* called by the seq program above.			*/
	/********************************************************/


capture mata mata drop seq();

mata;

real matrix seq(real matrix X) {;
	
	real matrix ONE, ONES, NUMR, NUMC, X0, CMEAN, RMEAN, CD, RD, XC, XR, NUM, DENOM1, DENOM, SEQ;
	
	ONE = J(cols(X),1,1);
	ONES = J(cols(X),cols(X),1);

	NUMR = rownonmissing(X);
	NUMC = colnonmissing(X)';
	
	X0 = editmissing(X , 0);
	
	CMEAN = colsum(X0)' :/ NUMC;
	RMEAN = rowsum(X0)  :/ NUMR;
	
	XR = J(cols(X),cols(X),.);
	XC = J(cols(X),cols(X),.);

	i=1;

	  while (i <= cols(X)) {;

	  XR[i,.] = editmissing(X[i,.], 0);
	  XC[.,i] = editmissing(X[.,i], 0);

	  i = i + 1;

	  };
	
	CD = XC-ONES*XC*invsym(ONE'*ONE);
	RD = (XR'-ONES*XR'*invsym(ONE'*ONE))';
	
	NUM = CD'*CD + RD*RD';

	DENOM1 = diagonal(CD'*CD) + diagonal(RD*RD');

	DENOM = DENOM1*DENOM1';

	SEQ = NUM :/ sqrt(DENOM);
	
	st_matrix("SEQ", SEQ);
  
  };

end;


	/****************************************************************************************/
	/* This program reads in the bootstrapped results from the ergm/tergm and combines	*/
	/* them across the boostraps. Note that I have reorganized the inputted data so that 	*/
	/* we have one column with everything stacked. You also have to delete the non-numeric 	*/
	/* parts of the data (variables names and such). 					*/
	/****************************************************************************************/

	
program define readin, eclass;

  syntax anything, varnames(namelist) numdraws(integer);
		
	quietly insheet using `anything', comma clear;

	generat order = _n;

	reshape long v, i(order) j(suborder);

	replace v=subinstr(v, "structure(list(coef = structure(c(","",1);
	replace v=subinstr(v, ")","",1);
	
	generat double coef=real(v);
	
	drop if missing(coef);
	
	sort order suborder;
	
	  keep coef;
	
	  local numvars = wordcount("`varnames'");
	
	  egen param = seq(), from(1) block(`numdraws');
	  egen draw = seq(), from(1) to(`numdraws');
	  
	  quietly keep in 1/`=`numdraws'*`numvars'';
	    
	  quietly reshape wide coef, i(draw) j(param);
	  
	  renvars coef1-coef`numvars' \ `varnames';
	  
	  quietly correlate `varnames', cov;
	  
		matrix C = r(C);
		
	  quietly collapse (mean) `varnames';
	  
	  mkmat `varnames',  matrix(b);
	  
		display "";
	  
		ereturn post b C;
		ereturn display, plus neq(1);
		
end;


	/************************************************/
	/* This program combines the estimates from the */
	/* multiply imputed data sets.			*/
	/************************************************/


program define micalc, eclass;

  syntax, PARameters(name) COVariance(name) imputations(integer);

	local b `parameters';
	local C `covariance';
	local m = `imputations';
  
	local names: colnames(`b'1);

	forvalues i=1/`m' {;

	  if `i'==1 {;
		matrix b = `b'1;
		matrix C = `C'1;
		};

	  else {;
  		matrix b = b + `b'`i';
		matrix C = C + `C'`i';
		};
		
	  };

	matrix b = b/`m';
	matrix C = C/`m';
	
	forvalues i=1/`m' {;

	  matrix bb=`b'`i'-b;

	  if `i'==1 {;
		matrix Vb=(bb)'*bb;
		};

	  else matrix Vb = Vb + (bb)'*bb;

	  };

	matrix Vb=Vb/(`m'-1);				/* Covariance adjustment */

	matrix V=C+(1+1/`m')*Vb	;			/* Final covariance   */

	ereturn post b V;
	ereturn display, plus neq(3);
	

end;

	/********************************************************/
	/* Now we open the data, run the model, and calculate 	*/ 
	/* the structural equivalence measure. 			*/
	/********************************************************/

clear;

tempfile data;

save table1, replace emptyok;

		/* Estimate trade equation using altnerate data set that has this variable. */

use maoz-gleditsch1948.dta, clear;

	replace logoil1 = logoil1/1000000; 
	replace logoil2 = logoil2/1000000; 
	
		/* Estimate instrumented trade equation. */
	
regress ln_expab ln_dist ln_gdpa ln_gdpb ln_popa ln_popb logoil1 logoil2, cluster(ccode1);
	
	estimates store trade;
	
	estout trade using table7.txt, replace style(tab) cells("b(fmt(3) star) se(par)")  modelwidth(7) 
		stats(N r2 chi2) starlevels(** 0.05 *** 0.01)
		legend label collabels(none) varlabels(_cons Constant ln_dist  "Distance (logged)" ln_gdpb "GDP pc B (logged)" 
		  ln_popb "Population B (logged)" logoil_gas_valuepop1 "Oil and Gas pc A" 
		  logoil_gas_valuepop2 "Oil and Gas pc B");
	
	keep if !missing(minreg,caprat,distance,atopsecorr,igosecorr);
	
		/* This block of code calculates the adjustment to the SEq variable to reflect rescaling. */
	
	  local sigma = e(rss)/e(df_r);
	
	  predict trade_hat;
	  predict e2_hat, resid;
	  predict stdp, stdp;
	  
	  replace trade_hat = 0 if ccode1==ccode2;
	
	  egen var_trade_hat_ik = var(trade_hat) if ccode1!=ccode2, by(ccode1);
	  egen var_trade_hat_kj = var(trade_hat) if ccode1!=ccode2, by(ccode2);

	  egen var_e2_hat_ik = var(e2_hat) if ccode1!=ccode2, by(ccode1);
	  egen var_e2_hat_kj = var(e2_hat) if ccode1!=ccode2, by(ccode2);
	  
	  save `data', replace;
	  
	    collapse (mean) var_trade_hat_kj var_e2_hat_kj, by(ccode2);
	  
	    rename ccode2 ccode1;
	    rename var_trade_hat_kj var_trade_hat_ki;
	    rename var_e2_hat_kj var_e2_hat_ki;
	    
	    merge 1:m ccode1 using `data';
	    
	    drop _merge;
	  
	  save `data', replace;
	  
	    collapse (mean) var_trade_hat_ik var_e2_hat_ik, by(ccode1);
	  
	    rename ccode1 ccode2;
	    rename var_trade_hat_ik var_trade_hat_jk;
	    rename var_e2_hat_ik var_e2_hat_jk;
	    
	    merge 1:m ccode2 using `data';
	    
	    drop _merge;

	  generat ratio = sqrt((var_trade_hat_ik + var_trade_hat_ki + var_e2_hat_ik + var_e2_hat_ki)*(var_trade_hat_jk + var_trade_hat_kj 
		+ var_e2_hat_jk + var_e2_hat_kj)/((var_trade_hat_ik + var_trade_hat_ki)*(var_trade_hat_jk + var_trade_hat_kj)));
	  
		/* Generate the SEq of trade instrument following Kalejian. */
		
	preserve;

	egen uniqid = group(ccode1 ccode2);
	
	tsset uniqid year;
		
	foreach var of varlist ln_dist ln_gdpa ln_gdpb ln_popa ln_popb logoil1 logoil2 {;

	  gen `var'_sq=`var'^2;
	  gen `var'_cube=`var'^3;

	  };
		
	regress tradesecorr ln_dist ln_gdpa ln_gdpb ln_popa ln_popb logoil1 logoil2
		ln_dist_sq ln_gdpa_sq ln_gdpb_sq ln_popa_sq ln_popb_sq logoil1_sq logoil2_sq
		ln_dist_cube ln_gdpa_cube ln_gdpb_cube ln_popa_cube ln_popb_cube logoil1_cube logoil2_cube
		if ccode1!=ccode2 & year>1959 & year<1997 & !missing(ln_expab, ln_dist, ln_gdpa, ln_gdpb, ln_popa, ln_popb, logoil1, logoil2);
		
	predict trade_seq_hat, xb;
	predict trade_seq_hat_sd, stdp;

	  forvalues i=1/5 {;

		generat trade_seq_hat`i' = trade_seq_hat + trade_seq_hat_sd*invnorm(uniform());
		
		};
		
	keep ccode1 ccode2 year trade_seq_hat*;
		
	save `data', replace;
		
	restore;
		
	merge m:1 ccode1 ccode2 year using `data', assert(match) keep(match) generat(_merge_trade_seq_hat) keepusing(trade_seq_hat*);
		
		/* Just keep the variables we need. */
	
	keep id1 id2 expab ln_expab ln_dist ln_gdpa ln_gdpb ln_popa ln_popb logoil1 logoil2 ccode* year rgdpca
		dichmid20 minreg302 caprat301 distance tradesecorr atopsecorr igosecorr integcorrse midspline1 
		midspline2 midspline3 polrel dyad ratio trade_seq_hat*;
		
	compress;
	
		/* Now we calculate the SEq instrument with multiple imputation. */

	estimates restore trade; 
	
	save `data', replace;
			
	forvalues draw=1/5 {;
	
	  use `data', clear;
	
	  quietly predict trade_yhat;
	  quietly predict trade_sd, stdp;
	  
	  quietly replace trade_yhat = trade_yhat + trade_sd*invnorm(uniform());
	
	  quietly replace trade_yhat = exp(trade_yhat);
	
		display "";
		noisily _dots 0, title(Calculating SEQ by year) reps(40);
		  
		forvalues year = 1960/1999 {;
		
		preserve;
		
			quietly keep if year==`year';
			
		/*Calculating diagonal entries*/
			
			quietly {;
			
			  egen sum_yhat=total(trade_yhat), by(ccode1);
			  gen diag_trade_yhat=(1-sum_yhat)/rgdpca;
			  keep ccode1 ccode2 year trade_yhat diag_trade_yhat;

				/* These countries are on only one side of trade and so we don't get a square matrix. */
 
			if year == 1974 {; 
			  drop if ccode1==31; 
			  drop if ccode2==53; 
			  };
			 
				/* Double reshape ensures square matrix. */
			 
			  reshape wide *trade_yhat, i(ccode1) j(ccode2);
			  reshape long trade_yhat diag_trade_yhat, i(ccode1) j(ccode2);	
			  
			  replace trade_yhat = diag_trade_yhat if ccode1==ccode2;
			  
			  };

			seq trade_yhat, id1(ccode1) id2(ccode2);
			
			quietly {;
			
			  generat year = `year';
			
			  append using table1;
			 
			  save table1, replace;
			
			  };
			
			restore;
			
			noisily _dots `year' 0;
			
			};

		quietly {;
		
		  merge 1:1 ccode1 ccode2 year using table1;
		  
		  replace trade_yhat_seq = trade_yhat_seq/ratio;
		  
		  rename trade_yhat_seq trade_yhat_seq`draw';
		  
		  saveold draw`draw', replace;

		  clear;

		  save table1, replace emptyok;
		  
		  };
			
	};
			
		/* Now generate the data using the observed trade variable. */

use `data', clear;

generat trade_y = expab;

	display "";
	noisily _dots 0, title(Calculating SEQ by year) reps(40);
	  
	forvalues year = 1960/1999 {;
	
	preserve;
	
		quietly {;
		
		  keep if year==`year';
		
		/*Calculating diagonal entries*/
		
		  egen sum_y = total(trade_y), by(ccode1);
		  gen diag_trade_y=(1-sum_y)/rgdpca;
		  keep ccode1 ccode2 year trade_y diag_trade_y;
			 
		  reshape wide *trade_y, i(ccode1) j(ccode2);
		  reshape long trade_y diag_trade_y, i(ccode1) j(ccode2);	
		  replace trade_y = diag_trade_y if ccode1==ccode2;
		  
		  };
		
		seq trade_y, id1(ccode1) id2(ccode2);
		
		quietly {;
		
		  generat year = `year';
		
		  append using table1;
		 
		  save table1, replace;
		
		  restore;
		
		  };
		
		noisily _dots `year' 0;
		
		};

	  merge 1:1 ccode1 ccode2 year using table1;
	 
	  saveold draw0, replace;

	  erase table1.dta;

		/* Now merge the SEq and trade variables back into the original data sets. */

use jcr2006replication, clear;

  generat ccode1 = statea; 
  generat ccode2 = stateb;

  merge 1:1 ccode1 ccode2 year using draw0, keep(match) generat(_merge_mg) 
	keepusing(trade_y trade_y_seq trade_seq_hat* ln_expab ln_dist ln_gdpa ln_gdpb ln_popa ln_popb logoil1 logoil2);
  
  save table1, replace;
  
forvalues i=1/5 {;
  
	  merge 1:1 ccode1 ccode2 year using draw`i', keep(match) generat(_merge_mg`i') 
		keepusing(trade_yhat_seq`i');
	  
	  compress;
	  
	  save table1, replace;

	};
	
		/* Generate time variables. */
	
generat t1 = year - 1960;
generat t2 = t1^2;
generat t3 = t1^3;

		/* Create lagged variables. */

xtset dyad year;

foreach var of varlist trade_yhat_seq* trade_seq_hat* minreg302 caprat301 distance atopsecorr igosecorr tradesecorr integcorrse 
	ln_dist ln_gdpa ln_gdpb ln_popa ln_popb logoil1 logoil2 {;

  generate L`var'=l.`var';
  
  };
	
  drop if year < 1960 | year > 1996;

		/* Keep observations with trade fully observed. */
  
  drop if missing(ln_expab, ln_dist, ln_gdpa, ln_gdpb, ln_popa, ln_popb, logoil1, logoil2);

  compress;
  
  save table1, replace;
   
		/* Clean up temporary data sets. */
	   
	erase draw0.dta;
	erase draw1.dta;
	erase draw2.dta;
	erase draw3.dta;
	erase draw4.dta;
	erase draw5.dta;


	/********************************/
	/* Run the logit models (1-4).	*/
	/********************************/

		/* Run the exact replication (Model 1). */
	
use jcr2006replication.dta, clear;

  drop if dyad==.;

  tsset dyad year;

  logit dichmid20 L.tradesecorr  L.minreg302 L.caprat301 L.distance L.atopsecorr L.igosecorr peaceyrs midspline1 
	midspline2 midspline3 if statea!=stateb;

	estimates store original_alld;
	
		/* Now read in the data we created with merged variables. */
	
version 10;

use table1, clear;		
		
		/* Run the base model on the restricted sample (Model 2). */
	
version 10: logit dichmid20 Ltradesecorr Lminreg302 Lcaprat301 Ldistance Latopsecorr Ligosecorr peaceyrs t1 t2 t3 if !missing(trade_seq_hat1);

  estimates store observed_alld_maozseq2;
	  
		/* Run the instrumented network (Model 3). */
		/* Loop over the multiply imputed values.  */

nois _dots 0, title(Running model on each imputed data set) reps(5);

  forvalues i=1/5 {;
  
  logit dichmid20 Ltrade_yhat_seq`i' Lminreg302 Lcaprat301 Ldistance Latopsecorr Ligosecorr peaceyrs t1 t2 t3 if !missing(Ltrade_seq_hat1);

  	matrix Q`i' = e(b);
  	matrix W`i' = e(V);
	local  ll`i' = e(ll);
	
	nois _dots `i' 0;
	
  	};

		/* Run micalc to combine the estimates and store them. */
	
	micalc, parameters(Q) covariance(W) imputations(5);
	
		/* -estout- seems to be necessary to get it to -estimates store-. */
	
	estout, cells(b se(par));
	estimates store maoz_mi_alld2;	

		/* Run the instrumented score (Model 4). */

nois _dots 0, title(Running model on each imputed data set) reps(5);

  forvalues i=1/5 {;
  
  logit dichmid20 Ltrade_seq_hat`i' Lminreg302 Lcaprat301 Ldistance Latopsecorr Ligosecorr peaceyrs t1 t2 t3;

  	matrix Q`i' = e(b);
  	matrix W`i' = e(V);
	local  ll`i' = e(ll);
	
	nois _dots `i' 0;
	
  	};

	micalc, parameters(Q) covariance(W) imputations(5);
	
		/* -estout- seems to be necessary to get it to -estimates store-. */
	
	estout, cells(b se(par));
	estimates store ivseq_alld2;	
		
	
	/**********************************************************/
	/* Now read in the various data sets and get the results. */
	/* These pull in the previously created R results for the */
	/* ERGM models and combine them across bootstraps and	  */
	/* imputations to store in Stata and combine in the table.*/
	/**********************************************************/

		/* These are the base models for comparison to Cranmer and Desmarais. */
	
readin table1-ergm-base.txt, varnames(Intercept Lminreg302 Lcaprat301 Ldistance Latopsecorr Ligosecorr Ltradesecorr 
	t1 t2 t3 peaceyrs tchange tschange) numdraws(1000);
	
	estout, cells(b se(par));
	estimates store base;

		/* Now read in the 5 imputed models. */
		/* Start with the instrumented network. */
	
forvalues i=1/5 {;

  readin table1-ergm-ivnetwork`i'.txt, varnames(Intercept Lminreg302 Lcaprat301 Ldistance Latopsecorr Ligosecorr 
	Ltrade_yhat_seq t1 t2 t3 peaceyrs tchange tschange) numdraws(1000);
	
	matrix b`i'=e(b);
	matrix C`i'=e(V);
	
	};
	
	micalc, parameters(b) covariance(C) imputations(5);
	
	  estout, cells(b se(par));
	  estimates store ivnetwork;

		/* Now the 5 imputed results for the instrumented score. */
		
forvalues i=1/5 {;

  readin table1-ergm-ivscore`i'.txt, varnames(Intercept Lminreg302 Lcaprat301 Ldistance Latopsecorr Ligosecorr 
	Ltrade_seq_hat t1 t2 t3 peaceyrs tchange tschange) numdraws(1000);
	
	matrix b`i'=e(b);
	matrix C`i'=e(V);
	
	};
	
	micalc, parameters(b) covariance(C) imputations(5);
	
	  estout, cells(b se(par));
	  estimates store ivscore;

	  
	/*****************/  
	/* Make Table 1. */
	/*****************/  

	
estout original_alld observed_alld_maozseq2 maoz_mi_alld2 ivseq_alld2 base ivnetwork ivscore using table1.txt, replace style(tab)
	cells(b(fmt(3) star) se(par))
	modelwidth(8) varwidth(40)
	mlabels("All dyads" "Naive" "Instr. Net." "Instr. Score" "Naive" "Instr. Net." "Instr. Score")
	mgroups("Logit" "ERGM", pattern(1 0 0 0 1 0 0))
	stat(N, fmt(%9.0f))
	starlevels( * 0.05 ** 0.01) legend
	rename(Intercept _cons Lminreg302 L.minreg302 Lcaprat301 L.caprat301 Ldistance L.distance Latopsecorr L.atopsecorr 
	  Ltradesecorr L.tradesecorr Ligosecorr L.igosecorr Ltrade_yhat_seq5 L.trade_yhat_seq Ltrade_seq_hat5 L.trade_seq_hat
	  Ltrade_yhat_seq L.trade_yhat_seq Ltrade_seq_hat L.trade_seq_hat)
	order(L.tradesecorr L.trade_yhat_seq L.trade_seq_hat L.minreg302 L.caprat301 L.distance L.atopsecorr L.igosecorr)
	collabels(, none)
	varlabels(L.tradesecorr "Trade Equivalence (Maoz et al. 2006)"
	  L.trade_yhat_seq	"Trade Equivalence (Using Trade IV)"
	  L.trade_seq_hat	"Trade Equivalence (Instrumented Score)"
	  L.caprat301		"Capability Ratio"
	  L.minreg302		"Minimum Regime Score"
	  L.distance		"Distance"
	  L.atopsecorr		"Alliance Equivalence"
	  L.igosecorr		"IGO Equivalence"
	  tschange		"Two Stars"
	  tchange		"Triads"
	  _cons           	"Constant");
	

log close;
clear;
exit;
